Packages
# Packages
require(ggplot2)
require(plotly)
Loading required package: plotly
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Attaching package: 㤼㸱plotly㤼㸲
The following object is masked from 㤼㸱package:ggplot2㤼㸲:
last_plot
The following object is masked from 㤼㸱package:stats㤼㸲:
filter
The following object is masked from 㤼㸱package:graphics㤼㸲:
layout
require(geojsonio)
Loading required package: geojsonio
package 㤼㸱geojsonio㤼㸲 was built under R version 4.0.3
Attaching package: 㤼㸱geojsonio㤼㸲
The following object is masked from 㤼㸱package:base㤼㸲:
pretty
require(sp)
Loading required package: sp
package 㤼㸱sp㤼㸲 was built under R version 4.0.3
require(sf)
Loading required package: sf
package 㤼㸱sf㤼㸲 was built under R version 4.0.3Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
require(rvest)
Loading required package: rvest
Loading required package: xml2
require(RSelenium)
Loading required package: RSelenium
package 㤼㸱RSelenium㤼㸲 was built under R version 4.0.3
require(htmltools)
Loading required package: htmltools
require(ggmap)
Loading required package: ggmap
package 㤼㸱ggmap㤼㸲 was built under R version 4.0.3Google's Terms of Service: https://cloud.google.com/maps-platform/terms/.
Please cite ggmap if you use it! See citation("ggmap") for details.
Attaching package: 㤼㸱ggmap㤼㸲
The following object is masked from 㤼㸱package:plotly㤼㸲:
wind
Data
# Download file for state info
url = "https://opendata.arcgis.com/datasets/5f45e1ece6e14ef5866974a7b57d3b95_1.geojson"
file = "NJ_counties.geojson"
download.file(url,file)
rm(url)
# Convert file to sf object
file = "NJ_counties.geojson"
NJ_Counties <-
geojson_sf(file)
rm(file)
# Clean Data
NJ_Counties_Cleaned <-
NJ_Counties %>%
transmute(
county = COUNTY,
CO = CO,
pop = POP2010,
popdensity = POPDEN2010,
Shape_Length = Shape_Length,
Shape_Area = Shape_Area,
GNIS = GNIS
)
#get page source from website
gc()
driver <- rsDriver(browser = c("firefox"), port = 44454L)
remote_driver <- driver[["client"]]
remote_driver$navigate("https://www.childrens-specialized.org/locations-directory/?")
page <- remote_driver$getPageSource()
# Retrieve information from directory
Xpathgen1 = "/html/body/div[1]/div/div/div[2]/div/div[2]/div["
Xpathgen2 = "]/div/div[2]/article"
Hosinfo <- data.frame()
for (i in 1:15){
XPath <- paste(Xpathgen1,i,Xpathgen2,sep = "")
Node <- page[[1]] %>%
read_html() %>%
html_nodes(xpath = XPath)
name <-
Node[[1]] %>%
html_node("h2") %>%
html_text()
address <-
Node[[1]] %>%
html_node("h3") %>%
html_text() %>%
gsub(pattern = "\n *",replacement = " ", x = .)
for (i in 1:7){
XPathday <- paste(XPath,"/div[",i,"]",sep = "")
day <- page[[1]] %>%
read_html() %>%
html_nodes(xpath = XPathday) %>%
html_attr("class") %>%
grep("-Hours",x = .,value = TRUE) %>%
gsub("-Hours","",x = .)
times <-
page[[1]] %>%
read_html() %>%
html_nodes(xpath = XPathday) %>%
html_node("h3") %>%
html_text()
assign(day,times)
}
row = data.frame(name,address,Monday,Tuesday,Wednesday,Thursday,Friday,Saturday,Sunday)
Hosinfo <- rbind(Hosinfo,row)
}
# Write csv file
write.csv(Hosinfo, "Hospitals.csv")
# Data Wrangling
Hosinfo <- read.csv("Hospitals.csv")
pattern <- "([0-1]*[0-9]:[0-5][0-9] *[AaPp][Mm][-to ]+[0-1]*[0-9]:[0-5][0-9] *[AaPp][Mm])" # Pattern to extract first time range
Hosloc <-
Hosinfo %>%
select(name,address) %>%
mutate_geocode(address) # Requires google API key
rm(pattern)
write.csv(Hosloc,"Hospitalsloc.csv")